Checking Species Abundance and Biomass for Previous Works

While investigating some discrepancies among different datasets a handful of corrections were made. This document is intended to see if any of those changes have important implications for some of the species-specific reports that have been done recently.

The three datasets being compared here are the survdat_Nye_allseasons.RData, Survdat_Nye_Aug_2020.RData, and the more recent NEFSC_BTS_all_seasons_03032021.RData.

Load SURVDAT Sources

# Cleanup functions
# source(here("R/01_nefsc_ss_build.R"))
source(here("R/01_nefsc_ss_build_nodrop.R"))

####  Survdat resource Load Data


# 2019 Data used in 2020 paper
load(paste0(nmfs_path, "Survdat_Nye_allseason.RData"))
survdat_nye  <- survdat %>% clean_names()

# Run cleanup
survdat_nye <- survdat_prep_nodrop(survdat = survdat_nye) %>% 
  mutate(survdat_source = "survdat_nye")




# 2020 data received last august
load(paste0(nmfs_path, "Survdat_Nye_Aug 2020.RData"))
survdat_20 <- clean_names(survdat) 


# Run cleanup
survdat_20 <- survdat_prep_nodrop(survdat = survdat_20) %>% 
  mutate(survdat_source = "survdat_nye2020")




# Data we just received in 2021 with errors located and corrected
load(paste0(nmfs_path, "2021_survdat/NEFSC_BTS_all_seasons_03032021.RData"))
survdat_21 <- survey$survdat %>% clean_names()

# # Same file but with additional bio columns from Sean
# load(paste0(nmfs_path, "2021_survdat/NEFSC_BTS_2021_bio_03192021.RData"))
# survdat_21 <- survey$survdat %>% clean_names()


# Run cleanup
survdat_21 <- survdat_prep_nodrop(survdat = survdat_21) %>% 
  mutate(survdat_source = "survdat_2021") 

rm(survdat, survey)

Load List of Published Species

####  Load the species list from Andrew
species_check <- read_csv(here("data/andrew_species/Assesmentfishspecies.csv"), 
                          col_types = cols())
species_check <- species_check %>% 
  clean_names() %>% 
  mutate(svspp = str_pad(svspp, 3, pad = "0", side = "left"),
         comname = tolower(comname),
         species = str_to_title(species)) %>% 
  arrange(svspp)


# Put in a list
source_list <- list("survdat_nye_2019" = survdat_nye,
                    "survdat_nye_2020" = survdat_20,
                    "survdat_2021"     = survdat_21)

Making Comparisons

In this code chunk the different sources are put together in a list, and the same cleanup steps are applied to each. The years are limited to before 2017 so there are no extra years in some not available in others. The SVSPP codes are also limited to just the ones in the list Andrew provided.

After that repeated records for abundance and biomass are dropped. These repeats exist to accommodate the length and number at length details. We don’t need them for this so they are dropped here.

# Make years comparable
# Filter species down for both

source_list <- map(source_list, function(survdat_data){
  
  # filter years
  sdat <- survdat_data %>% 
    filter(est_year >= 2008,
           est_year <= 2017,
           svspp %in% species_check$svspp)
  
  # Pull distinct station abundance and biomass for each species
  sdat <- sdat %>% 
    distinct(id, est_year, season, svvessel, 
             comname, svspp, catchsex, abundance, biom_adj)

  })


# Split each survdat set into a list of single species tables
source_splits <- map(source_list, function(source_data){
  source_split <- source_data %>% split(.$comname) })

To make comparisons between the different data sets I first check that the svspp code is present in the dataset. Then for each source I get a total annual value for biomass and abundance. Then across data sources I calculate the correlation between these values, and the percent change from one data source to the most recently revised data pull.

The table itself is retained in addition to some overall summary metrics.

# Make  Comparisons

# Vector of species and their common names, atleast the common names Andrew used
andrew_species <- species_check$comname %>% setNames(species_check$svspp)


# Pulling out details for the species
species_comparisons <- imap(andrew_species, function(species_comname, species_svspp){
  
  # there are some name mismatches in common name, so catch those and return an error 
  # that explains which data it was not in
  in_nye  <- species_comname %in% names(source_splits$survdat_nye_2019)
  in_20   <- species_comname %in% names(source_splits$survdat_nye_2020)
  in_21   <- species_comname %in% names(source_splits$survdat_2021)
  in_both <- in_nye & in_21
  in_all  <- in_nye & in_20 & in_21
  
  if(in_both == FALSE){
    return(list("in_nye"  = in_nye, 
                "in_21"   = in_21,
                "in_all"  = in_all,
                "data"    = data.frame(),
                "metrics" = data.frame())) }
  
  
  
  # Pull just that species from the 2019 data
  summ_19 <- source_list$survdat_nye_2019 %>% 
    filter(svspp == species_svspp) %>% 
    group_by(comname, est_year) %>% 
    summarise(abund_19 = sum(abundance, na.rm = T),
              biom_19  = sum(biom_adj, na.rm = T),
              .groups  = "keep") %>% ungroup()
  
  # same for 2020 data
  summ_20 <- source_list$survdat_nye_2020 %>% 
    filter(svspp == species_svspp) %>% 
    group_by(comname, est_year) %>% 
    summarise(abund_20 = sum(abundance, na.rm = T),
              biom_20  = sum(biom_adj, na.rm = T),
              .groups = "keep") %>% ungroup()
  
  
   # and 2021 data
  summ_21 <- source_list$survdat_2021 %>% 
    filter(svspp == species_svspp) %>% 
    group_by(comname, est_year) %>% 
    summarise(abund_21 = sum(abundance, na.rm = T),
              biom_21  = sum(biom_adj, na.rm = T),
              .groups = "keep") %>% ungroup()
  
  # join them for side-by-side data
  combined_data <- left_join(summ_19, summ_20, by = c("comname", "est_year")) %>% 
    left_join(summ_21, by = c("comname", "est_year")) %>% 
    mutate(
      # 2019-2021 comparison
      abund_change_19to21 = ((abund_21 - abund_19) / abund_19) * 100,
      biom_change_19to21  = ((biom_21 - biom_19) / biom_19) * 100,
      # 2020-2021 comparison
      abund_change_20to21 = ((abund_21 - abund_20) / abund_20) * 100,
      biom_change_20to21  = ((biom_21 - biom_20) / biom_20) * 100) 
  
  #return(combined_data)
  
  # abundance correlation
  abund_cor_19to21   <- cor(combined_data$abund_21, 
                            combined_data$abund_19, 
                            use = "pairwise.complete.obs")
  abund_cor_20to21   <- cor(combined_data$abund_21, 
                            combined_data$abund_20, 
                            use = "pairwise.complete.obs")
  
  # biomass correlation
  biomass_cor_19to21 <- cor(combined_data$biom_21, 
                            combined_data$biom_19,   
                            use = "pairwise.complete.obs")
  biomass_cor_20to21 <- cor(combined_data$biom_21, 
                            combined_data$biom_20,   
                            use = "pairwise.complete.obs")
  
  # average relative shift
  abund_shift_19to21 <- mean(combined_data$abund_change_19to21, na.rm = T)
  biom_shift_19to21  <- mean(combined_data$biom_change_19to21, na.rm = T)
  abund_shift_20to21 <- mean(combined_data$abund_change_20to21, na.rm = T)
  biom_shift_20to21  <- mean(combined_data$biom_change_20to21, na.rm = T)
  
  # put in list to export
  list("data" = combined_data,
       "metrics" = data.frame(
         "comname"            = species_comname,
         "svspp"              = species_svspp,
         "abund_corr_19to21"  = abund_cor_19to21,
         "abund_corr_20to21"  = abund_cor_20to21,
         "biom_corr_19to21"   = biomass_cor_19to21,
         "biom_corr_20to21"   = biomass_cor_20to21,
         "perc_abund_19to21"  = abund_shift_19to21,
         "perc_abund_20to21"  = abund_shift_20to21,
         "perc_biom_19to21"   = biom_shift_19to21,
         "perc_biom_20to21"   = biom_shift_20to21)
       )
  })

For simplicity I then append the tables together so they are single data frames and not lists.

# put data and metrics into a table
comparison_data    <- map_dfr(species_comparisons, ~.x[["data"]]) 
comparison_metrics <- map_dfr(species_comparisons, ~.x[["metrics"]]) 

Visualizing the three data sources Abundance/Biomass

As a quick visual reference, here is how total annual abundance and biomasses compare across data sources. The species have been split into groups to make paneling less crowded.

# How many species are there, and how many can we panel well - 84
#unique(andrew_species)

# make even number groups
even_groups <- list(
  group_1 = sort(andrew_species)[1:17],
  group_2 = sort(andrew_species)[18:34],
  group_3 = sort(andrew_species)[35:51],
  group_4 = sort(andrew_species)[52:68],
  group_5 = sort(andrew_species)[69:84])

Building Plots for Actual Abundance/Biomass

timeseries_plots <- map(even_groups, function(species_subset){
  
  species_subset_dat <- comparison_data %>% 
    filter(comname %in% species_subset) %>% 
    mutate(comname = str_to_title(comname),
           comname = fct_drop(comname),
           comname = fct_inorder(comname))
  
  
  # abundance plot
  abundance_plot <- ggplot(species_subset_dat, aes(est_year)) +
    geom_line(aes(y = abund_19, 
                  color = "Survdat Nye Allseason 2019", 
                  linetype = "Survdat Nye Allseason 2019"), 
              group = 1, alpha = 0.7) +
    geom_line(aes(y = abund_20, 
                  color = "Survdat Nye - August 2020",
                  linetype = "Survdat Nye - August 2020"), 
              group = 1, alpha = 0.7) +
    geom_line(aes(y = abund_21, 
                  color = "Newest Survdat",
                  linetype = "Newest Survdat"), 
              group = 1, alpha = 0.7) +
    scale_y_continuous(labels = scales::comma_format()) +
    scale_x_continuous(breaks = scales::pretty_breaks()) +
    scale_color_gmri() +
    facet_wrap(~comname, scales = "free") +
    labs(x  = "", y = "Annual Abundance", 
         color = "Survdat Retrieval", 
         linetype = "Survdat Retrieval") +
    theme(legend.position = "none")
  
  
  biomass_plot <- ggplot(species_subset_dat, aes(est_year)) +
    geom_line(aes(y = biom_19, 
                  color = "Survdat Nye Allseason 2019", 
                  linetype = "Survdat Nye Allseason 2019"), 
              group = 1, alpha = 0.7) +
    geom_line(aes(y = biom_20, 
                  color = "Survdat Nye - August 2020",
                  linetype = "Survdat Nye - August 2020"), 
              group = 1, alpha = 0.7) +
    geom_line(aes(y = biom_21, 
                  color = "Newest Survdat",
                  linetype = "Newest Survdat"), 
              group = 1, alpha = 0.7) +
    scale_y_continuous(labels = scales::comma_format()) +
    scale_x_continuous(breaks = scales::pretty_breaks()) +
    scale_color_gmri() +
    facet_wrap(~comname, scales = "free") +
    labs(x  = "", y = "Annual Biomass", 
         color = "Survdat Retrieval", 
         linetype = "Survdat Retrieval") +
    theme(legend.position = "bottom")
  
  return(list(a_plot = abundance_plot, b_plot = biomass_plot,
              stacked = (abundance_plot / biomass_plot)))
  
  
})

Building Plots for Differences in Abundance/Biomass

timeseries_diff_plots <- map(even_groups, function(species_subset){
  
  species_subset_dat <- comparison_data %>% 
    filter(comname %in% species_subset) %>% 
    mutate(comname = str_to_title(comname),
           comname = fct_drop(comname),
           comname = fct_inorder(comname))
  
  
  # abundance plot
  abundance_plot <- ggplot(species_subset_dat, aes(est_year)) +
    geom_line(aes(y = abund_change_19to21, 
                  color = "Abundance Change 2019 to 2021", 
                  linetype = "Abundance Change 2019 to 2021"), 
              group = 1, alpha = 0.7) +
    geom_line(aes(y = abund_change_20to21, 
                  color = "Abundance Change 2020 to 2021",
                  linetype = "Abundance Change 2020 to 2021"), 
              group = 1, alpha = 0.7) +
    scale_y_continuous(labels = scales::comma_format()) +
    scale_x_continuous(breaks = scales::pretty_breaks()) +
    scale_color_gmri() +
    facet_wrap(~comname, scales = "free") +
    labs(x  = "", y = "Difference in Annual Abundance", 
         color = "Survdat Retrieval Comparison", 
         linetype = "Survdat Retrieval Comparison") +
    theme(legend.position = "none")
  
  
  biomass_plot <- ggplot(species_subset_dat, aes(est_year)) +
    geom_line(aes(y = biom_change_19to21, 
                  color = "Biomass Change 2019 to 2021", 
                  linetype = "Biomass Change 2019 to 2021"), 
              group = 1, alpha = 0.7) +
    geom_line(aes(y = biom_change_20to21, 
                  color = "Biomass Change 2020 to 2021",
                  linetype = "Biomass Change 2020 to 2021"), 
              group = 1, alpha = 0.7) +
    scale_y_continuous(labels = scales::comma_format()) +
    scale_x_continuous(breaks = scales::pretty_breaks()) +
    scale_color_gmri() +
    facet_wrap(~comname, scales = "free") +
    labs(x  = "", y = "Difference in Annual Biomass", 
         color = "Survdat Retrieval Comparison", 
         linetype = "Survdat Retrieval Comparison") +
    theme(legend.position = "bottom")
  
  return(list(a_plot = abundance_plot, b_plot = biomass_plot,
              stacked = (abundance_plot / biomass_plot)))
  
  
})

Group 1

Observations

timeseries_plots$group_1$stacked

Differences

timeseries_diff_plots$group_1$stacked

Group 2

Observations

timeseries_plots$group_2$stacked

#### Differences

timeseries_diff_plots$group_2$stacked

Group 3

Observations

timeseries_plots$group_3$stacked

#### Differences

timeseries_diff_plots$group_3$stacked

Group 4

Observations

timeseries_plots$group_4$stacked

#### Differences

timeseries_diff_plots$group_4$stacked

Group 5

Observations

timeseries_plots$group_5$stacked

#### Differences

timeseries_diff_plots$group_5$stacked

Comparing 2019 to 2021

Abundance

Correlation

comparison_metrics %>% 
  mutate(comname = fct_reorder(comname, abund_corr_19to21, max, .desc = F)) %>% 
  ggplot(aes(x = abund_corr_19to21, y = comname)) +
    geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
    geom_point(color = gmri_cols("gmri blue")) +
    labs(y = "", x = "Correlation Between Total Annual Abundances")

Annual Percent Change

comparison_metrics %>% 
  mutate(comname = fct_reorder(comname, perc_abund_19to21, max, .desc = F)) %>% 
  ggplot(aes(x = perc_abund_19to21, y = comname)) +
    geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
    geom_point(color = gmri_cols("gmri blue")) +
    scale_x_continuous(labels = scales::percent_format()) +
    labs(y = "", x = "Avg. Percent Change in Annual Abundance\nMoving from Old Data -> New Data")

Haddock Spotlight

haddock_data <- comparison_data %>% 
  filter(comname == "haddock")

ab_abund <- haddock_data %>% 
  ggplot(aes(est_year)) +
  geom_line(aes(y = abund_19, color = "Survdat Nye Allseason 2019")) +
  geom_line(aes(y = abund_21, color = "Newest Survdat")) +
  scale_color_gmri() +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_x_continuous(breaks = scales::pretty_breaks()) +
  labs(x = "", y = "Abundance", 
       subtitle = "Haddock Absolute Abundance", color = "") +
  theme(legend.position = "bottom")

p_change <- haddock_data %>% 
  ggplot(aes(est_year, abund_change_19to21)) +
  geom_line() +
  scale_x_continuous(breaks = scales::pretty_breaks()) +
  labs(y = "Percent Change in Abundance\n2019 -> 2021", x = "")

ab_abund / p_change

Lobster Spotlight

lob_data <- comparison_data %>% 
  filter(comname == "american lobster")

ab_abund <- lob_data %>% 
  ggplot(aes(est_year)) +
  geom_line(aes(y = abund_19, color = "Survdat Nye Allseason 2019")) +
  geom_line(aes(y = abund_21, color = "Newest Survdat")) +
  scale_color_gmri() +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_x_continuous(breaks = scales::pretty_breaks()) +
  labs(x = "", y = "Abundance", 
       subtitle = "American Lobster Absolute Abundance", color = "") +
  theme(legend.position = "bottom")

p_change <- lob_data %>% 
  ggplot(aes(est_year, abund_change_19to21)) +
  geom_line() +
  scale_x_continuous(breaks = scales::pretty_breaks()) +
  labs(y = "Percent Change in Abundance\n2019 -> 2021", x = "")

ab_abund / p_change

Problem Species

Poor Correlation

# correlation
corr_cutoff <- 0.8
corr_species <- comparison_metrics %>% 
  filter(abund_corr_19to21 <= corr_cutoff) %>% 
   mutate(comname = fct_drop(comname)) %>% 
  pull(comname)

# filter species
corr_sub <- comparison_data %>% 
  filter(comname %in% corr_species) 

# how many species per plot
p1 <- corr_species[1:6]
p2 <- corr_species[-c(1:6)]

# p1
corr_sub %>% 
  filter(comname %in% p1) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  ggplot() +
  geom_line(aes(est_year, abund_19, color = "Survdat Nye Allseason 2019")) +
  geom_line(aes(est_year, abund_21, color = "Newest Survdat")) +
  facet_wrap(~comname, ncol = 1, scales = "free" ) +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_x_continuous(breaks = scales::pretty_breaks()) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Annual Abundance", 
       title = paste0("Species with Annual Abundance Correlation <= ", corr_cutoff),
       color = "Survdat Source",
       subtitle = "Subset 1")

# p1
corr_sub %>% 
  filter(comname %in% p2) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  ggplot() +
  geom_line(aes(est_year, abund_19, color = "Survdat Nye Allseason 2019")) +
  geom_line(aes(est_year, abund_21, color = "Newest Survdat")) +
  facet_wrap(~comname, ncol = 1, scales = "free" ) +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_x_continuous(breaks = scales::pretty_breaks()) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Annual Abundance", 
       title = paste0("Species with Annual Abundance Correlation <= ", corr_cutoff),
       color = "Survdat Source",
       subtitle = "Subset 2")

Large Percent Change

# percent change
perc_cutoff <- 25
perc_species <- comparison_metrics %>% 
  filter(perc_abund_19to21 >= perc_cutoff) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  pull(comname) 


comparison_data %>% 
  filter(comname %in% perc_species) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  ggplot() +
  geom_line(aes(est_year, abund_19, color = "Survdat Nye Allseason 2019")) +
  geom_line(aes(est_year, abund_21, color = "Newest Survdat")) +
  facet_wrap(~comname, ncol = 1, scales = "free") +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_x_continuous(breaks = scales::pretty_breaks()) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Annual Abundance",
       title = paste0("Species with Avg. Shift in Abundance >= ", perc_cutoff, "%"),
       color = "Survdat Source")

Biomass

Correlation

comparison_metrics %>% 
  mutate(comname = fct_reorder(comname, biom_corr_19to21, max, .desc = F)) %>% 
  ggplot(aes(x = biom_corr_19to21, y = comname)) +
    geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
    geom_point(color = gmri_cols("gmri blue")) +
    labs(y = "", x = "Correlation Between Total Annual Biomasses")

Annual Percent Change

comparison_metrics %>% 
  mutate(comname = fct_reorder(comname, perc_biom_19to21, max, .desc = F)) %>% 
  ggplot(aes(x = perc_biom_19to21, y = comname)) +
    geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
    geom_point(color = gmri_cols("gmri blue")) +
    labs(y = "", x = "Avg. Percent Change in Annual Biomasses\nMoving from Old Data -> New Data")

Haddock Spotlight

ab_biom <- haddock_data %>% 
  ggplot(aes(est_year)) +
  geom_line(aes(y = biom_19, color = "Survdat Nye Allseason 2019")) +
  geom_line(aes(y = biom_21, color = "Newest Survdat")) +
  scale_color_gmri() +
  scale_y_continuous(labels = scales::comma_format()) +
  labs(x = "", y = "Biomass", subtitle = "Haddock Absolute Biomass", color = "") +
  theme(legend.position = "bottom")

p_change <- haddock_data %>% 
  ggplot(aes(est_year, biom_change_19to21)) +
  geom_line() +
  labs(y = "Percent Change in Biomass\n2019 -> 2021", x = "")

ab_biom / p_change

Lobster Spotlight

ab_biom <- lob_data %>% 
  ggplot(aes(est_year)) +
  geom_line(aes(y = biom_19, color = "Survdat Nye Allseason 2019")) +
  geom_line(aes(y = biom_21, color = "Newest Survdat")) +
  scale_color_gmri() +
  scale_y_continuous(labels = scales::comma_format()) +
  labs(x = "", y = "Biomass", subtitle = "American Lobster Absolute Biomass", color = "") +
  theme(legend.position = "bottom")

p_change <- lob_data %>% 
  ggplot(aes(est_year, biom_change_19to21)) +
  geom_line() +
  labs(y = "Percent Change in Biomass\n2019 -> 2021", x = "")

ab_biom / p_change

Problem Species

Poor Correlation

# correlation
corr_cutoff <- 0.8
corr_species <- comparison_metrics %>% 
  filter(biom_corr_19to21 <= corr_cutoff) %>% 
   mutate(comname = fct_drop(comname)) %>% 
  pull(comname)

# plot
if(length(corr_species) > 0) {
comparison_data %>% 
  filter(comname %in% corr_species) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  ggplot() +
  geom_line(aes(est_year, biom_19, color = "Survdat Nye Allseason 2019")) +
  geom_line(aes(est_year, biom_21, color = "Newest Survdat")) +
  facet_wrap(~comname, ncol = 1, scales = "free" ) +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_x_continuous(breaks = scales::pretty_breaks()) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Annual Biomass", 
       title = paste0("Species with Annual Biomass Correlation <= ", corr_cutoff),
       color = "Survdat Source")
}

Large Percent Change

# percent change
perc_cutoff <- 25
perc_species <- comparison_metrics %>% 
  filter(perc_biom_19to21 >= perc_cutoff) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  pull(comname) 


comparison_data %>% 
  filter(comname %in% perc_species) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  ggplot() +
  geom_line(aes(est_year, biom_19, color = "Survdat Nye Allseason 2019")) +
  geom_line(aes(est_year, biom_21, color = "Newest Survdat")) +
  facet_wrap(~comname, ncol = 1, scales = "free") +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_x_continuous(breaks = scales::pretty_breaks()) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Annual Biomass",
       title = paste0("Species with Avg. Shift in Biomass >= ", perc_cutoff, "%"),
       color = "Survdat Source")

Comparing 2020 to 2021

Abundance

Correlation

comparison_metrics %>% 
  mutate(comname = fct_reorder(comname, abund_corr_20to21, max, .desc = F)) %>% 
  ggplot(aes(x = abund_corr_20to21, y = comname)) +
    geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
    geom_point(color = gmri_cols("gmri blue")) +
    labs(y = "", x = "Correlation Between Total Annual Abundances")

Annual Percent Change

comparison_metrics %>% 
  mutate(comname = fct_reorder(comname, perc_abund_20to21, max, .desc = F)) %>% 
  ggplot(aes(x = perc_abund_20to21, y = comname)) +
    geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
    geom_point(color = gmri_cols("gmri blue")) +
    scale_x_continuous(labels = scales::percent_format()) +
    labs(y = "", x = "Avg. Percent Change in Annual Abundance\nMoving from Old Data -> New Data")

Haddock Spotlight

ab_abund <- haddock_data %>% 
  ggplot(aes(est_year)) +
  geom_line(aes(y = abund_20, color = "Survdat Nye - August 202")) +
  geom_line(aes(y = abund_21, color = "Newest Survdat")) +
  scale_color_gmri() +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_x_continuous(breaks = scales::pretty_breaks()) +
  labs(x = "", y = "Abundance", subtitle = "Haddock Absolute Abundance", color = "") +
  theme(legend.position = "bottom")

p_change <- haddock_data %>% 
  ggplot(aes(est_year, abund_change_20to21)) +
  geom_line() +
  scale_x_continuous(breaks = scales::pretty_breaks()) +
  labs(y = "Percent Change in Abundance\n2020 -> 2021", x = "")

ab_abund / p_change

Lobster Spotlight

lob_data <- comparison_data %>% 
  filter(comname == "american lobster")

ab_abund <- lob_data %>% 
  ggplot(aes(est_year)) +
  geom_line(aes(y = abund_20, color = "Survdat Nye - August 2020")) +
  geom_line(aes(y = abund_21, color = "Newest Survdat")) +
  scale_color_gmri() +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_x_continuous(breaks = scales::pretty_breaks()) +
  labs(x = "", y = "Abundance", 
       subtitle = "American Lobster Absolute Abundance", color = "") +
  theme(legend.position = "bottom")

p_change <- lob_data %>% 
  ggplot(aes(est_year, abund_change_20to21)) +
  geom_line() +
  scale_x_continuous(breaks = scales::pretty_breaks()) +
  labs(y = "Percent Change in Abundance\n2020 -> 2021", x = "")

ab_abund / p_change

Problem Species

Poor Correlation

# correlation
corr_cutoff <- 0.8
corr_species <- comparison_metrics %>% 
  filter(abund_corr_20to21 <= corr_cutoff) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  pull(comname)


# filter species
corr_sub <- comparison_data %>% 
  filter(comname %in% corr_species) 


if(length(corr_species) != 0){
  
  
  # how many species per plot
p1 <- corr_species[1:6]
#p2 <- corr_species[-c(1:6)]

# p1
corr_sub %>% 
  filter(comname %in% p1) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  ggplot() +
  geom_line(aes(est_year, abund_20, color = "Survdat Nye - August 2020")) +
  geom_line(aes(est_year, abund_21, color = "Newest Survdat")) +
  facet_wrap(~comname, ncol = 1, scales = "free" ) +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_x_continuous(breaks = scales::pretty_breaks()) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Annual Abundance", 
       title = paste0("Species with Annual Abundance Correlation <= ", corr_cutoff),
       color = "Survdat Source",
       subtitle = "Subset 1")
  
}

Large Percent Change

# percent change
perc_cutoff <- 25
perc_species <- comparison_metrics %>% 
  filter(perc_abund_20to21 >= perc_cutoff) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  pull(comname) 


comparison_data %>% 
  filter(comname %in% perc_species) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  ggplot() +
  geom_line(aes(est_year, abund_20, color = "Survdat Nye - August 2020")) +
  geom_line(aes(est_year, abund_21, color = "Newest Survdat")) +
  facet_wrap(~comname, ncol = 1, scales = "free") +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_x_continuous(breaks = scales::pretty_breaks()) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Annual Abundance",
       title = paste0("Species with Avg. Shift in Abundance >= ", perc_cutoff, "%"),
       color = "Survdat Source")

Biomass

Correlation

comparison_metrics %>% 
  mutate(comname = fct_reorder(comname, biom_corr_20to21, max, .desc = F)) %>% 
  ggplot(aes(x = biom_corr_20to21, y = comname)) +
    geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
    geom_point(color = gmri_cols("gmri blue")) +
    labs(y = "", x = "Correlation Between Total Annual Biomasses")

Annual Percent Change

comparison_metrics %>% 
  mutate(comname = fct_reorder(comname, perc_biom_20to21, max, .desc = F)) %>% 
  ggplot(aes(x = perc_biom_20to21, y = comname)) +
    geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
    geom_point(color = gmri_cols("gmri blue")) +
    labs(y = "", x = "Avg. Percent Change in Annual Biomasses\nMoving from Old Data -> New Data")

Haddock Spotlight

ab_biom <- haddock_data %>% 
  ggplot(aes(est_year)) +
  geom_line(aes(y = biom_20, color = "Survdat Nye - August 2020")) +
  geom_line(aes(y = biom_21, color = "Newest Survdat")) +
  scale_color_gmri() +
  scale_y_continuous(labels = scales::comma_format()) +
  labs(x = "", y = "Biomass", subtitle = "Haddock Absolute Biomass", color = "") +
  theme(legend.position = "bottom")

p_change <- haddock_data %>% 
  ggplot(aes(est_year, biom_change_20to21)) +
  geom_line() +
  labs(y = "Percent Change in Biomass\n2020 -> 2021", x = "")

ab_biom / p_change

Lobster Spotlight

ab_biom <- lob_data %>% 
  ggplot(aes(est_year)) +
  geom_line(aes(y = biom_20, color = "Survdat Nye - August 2020")) +
  geom_line(aes(y = biom_21, color = "Newest Survdat")) +
  scale_color_gmri() +
  scale_y_continuous(labels = scales::comma_format()) +
  labs(x = "", y = "Biomass", subtitle = "American Lobster Absolute Biomass", color = "") +
  theme(legend.position = "bottom")

p_change <- lob_data %>% 
  ggplot(aes(est_year, biom_change_20to21)) +
  geom_line() +
  labs(y = "Percent Change in Biomass\n2020 -> 2021", x = "")

ab_biom / p_change

Problem Species

Poor Correlation

# correlation
corr_cutoff <- 0.8
corr_species <- comparison_metrics %>% 
  filter(biom_corr_20to21 <= corr_cutoff) %>% 
   mutate(comname = fct_drop(comname)) %>% 
  pull(comname)

# plot
if(length(corr_species) > 0){
comparison_data %>% 
  filter(comname %in% corr_species) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  ggplot() +
  geom_line(aes(est_year, biom_20, color = "Survdat Nye - August 2020")) +
  geom_line(aes(est_year, biom_21, color = "Newest Survdat")) +
  facet_wrap(~comname, ncol = 1, scales = "free" ) +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_x_continuous(breaks = scales::pretty_breaks()) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Annual Biomass", 
       title = paste0("Species with Annual Biomass Correlation <= ", corr_cutoff),
       color = "Survdat Source")
}

Large Percent Change

# percent change
perc_cutoff <- 25
perc_species <- comparison_metrics %>% 
  filter(perc_biom_20to21 >= perc_cutoff) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  pull(comname) 


comparison_data %>% 
  filter(comname %in% perc_species) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  ggplot() +
  geom_line(aes(est_year, biom_20, color = "Survdat Nye - August 2020")) +
  geom_line(aes(est_year, biom_21, color = "Newest Survdat")) +
  facet_wrap(~comname, ncol = 1, scales = "free") +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_x_continuous(breaks = scales::pretty_breaks()) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Annual Biomass",
       title = paste0("Species with Avg. Shift in Biomass >= ", perc_cutoff, "%"),
       color = "Survdat Source")

write_csv(comparison_data, here("data/survdat_biom_abund_compare.csv"))
 

A work by Adam A. Kemberling

Akemberling@gmri.org